/* cel sort module implements functions.
 * @author chenxin <chenxin619315@gmail.com>
 * 包括 插入，希尔，归并，快速，桶式排序
 *
 * test report: 
 * 1. environment: ubuntu/2G/AMD 2.8GHZ
 * 2. result: (take 1000000 random integer as sample)
 * +------------+---------------+
 * | Algorithm	| Time(sec)	|
 * +------------+---------------+
 * | insertion	| forget it	|
 * +------------+---------------+
 * | shell		| 1.430000	|
 * +------------+---------------+
 * | merge		| 1.250000	|
 * +------------+---------------+
 * | quick		| 0.430000	|
 * +------------+---------------+
 * | bucket(1)	| 0.050000	|
 * +------------+---------------+
 */
#include <stdlib.h>
#include <limits.h>
#include <string.h>
#include "stdiolib.h"
#include "sortlib.h"

/**
* insertion sort algorithm.
*
* @a	- the array to sort.
* @size	- the bytes that one element in the array takes.
* @comp	- the compare function.
* @start- the start offset.
* @end	- the end offset(not included).
*/
void cel_subinsert_sort(void * a, unsigned int size, cel_compare_fn_t comp,
		unsigned int start, unsigned int end)
{
	unsigned char *tmp = ( unsigned char * )malloc(size);
	unsigned char *ptr = ( unsigned char * )a;
	unsigned char *p;

	register int i, j;

	if(tmp == NULL) return;

	for(i = start + 1; i <= end; i++) 
	{
		cel_mem_copy( ptr + i * size, tmp, size );

		/* find the right index for tmp */
		for(j = i; j > start && comp(tmp, (p = ptr + (j - 1) * size)) < 0; j--) 
		{
			cel_mem_copy(p, p + size, size);
		}

		/* Copy the element to the right position */
		if(j < i) 
		{
			cel_mem_copy(tmp, ptr + j * size, size);
		}
	}

	/* free the tmp allocations */
	free(tmp);
}


/**
 * 步长序列
 * gaps array for shell sort.
 * this array was generated with 
 * 9 * pow(4, j) - 9 * pow(2, j) + 1
 * and 
 * pow(4, j) - 3 * pow(2, j) + 1 .
 */
static const int gaps[] = {
	1, 5,
	19, 41,
	109, 209, 505, 929,
	2161, 8929,
	16001, 36289, 64769,			//10000th
	146305, 260609, 587521,	
	1045505, 2354689, 4188161, 9427969,	
	16764929, 37730305, 67084289,
	150958081, 268386305, 603906049,
	1073643521, 2147483647
};


/**
 * shell sort algorithm.
 *
 * @a 		- the array to sort.
 * @length	- the length of the array. 
 * @size	- the bytes that one element in the array takes.
 * @comp	- comprare function.
 */
void cel_shell_sort(void * a, unsigned int length, unsigned int size, cel_compare_fn_t comp)
{

	unsigned char *tmp = (unsigned char *)malloc(size);
	unsigned char *ptr = (unsigned char *)a;
	unsigned char *p;

	register int i, j, gap;
	register int k = 0;

	if(tmp == NULL) 
		return;

	while(gaps[k] < length) 
		k++;

	while(--k >= 0) 
	{
		/* Get the gap */
		gap = gaps[k];
		for(i = gap; i < length; i++) 
		{
			cel_mem_copy(ptr + i * size, tmp, size);
			/* find the right index of the tmp */
			for(j = i; j >= gap && comp(tmp, (p = ptr + (j - gap) * size)) < 0; j -= gap) 
			{
				cel_mem_copy(p, p + gap * size, size);
			}
			/* located the tmp */
			if (j < i)
			{
				cel_mem_copy(tmp, ptr + j * size, size);
			}
		}
	}

	/* free the temp allocations*/
	free(tmp);
}


/**
 * merge the two half array.
 *
 * @a
 * @tmp	- temp array.
 * @l	- left index.
 * @m	- middle index.
 * @r	- right index.
 */
static void merge(unsigned char * a, unsigned char * tmp, unsigned int size, cel_compare_fn_t comp,
		unsigned int l, unsigned int r, unsigned int rend )
{
	unsigned int ptmp = l;
	unsigned int lend = r - 1;
	unsigned int p = l;

	register unsigned char * ltmp, * rtmp;

	while(l <=lend && r <= rend) 
	{
		ltmp = a + l * size;
		rtmp = a + r * size;
		if (comp(ltmp, rtmp) < 0) 
		{
			/* Copy the left current one to the temp */
			cel_mem_copy(ltmp, tmp + p * size, size);
			l++;
		} 
		else 
		{
			/* Copy the right current one to the temp */
			cel_mem_copy(rtmp, tmp + p * size, size);
			r++;
		}
		p++;
	}

	/* Copy the rest of the left to the temp */
	while(l <= lend) 
	{
		cel_mem_copy(a + l * size, tmp + p * size, size);
		l++; p++;
	}

	/* Copy the rest of the right to the temp */
	while(r <= rend) 
	{
		cel_mem_copy(a + r * size, tmp + p * size, size);
		r++; p++;
	}

	/* Copy the items back from the temp to the origin one */
	while(ptmp <= rend) 
	{
		cel_mem_copy(tmp + ptmp * size, a + ptmp * size, size);
		ptmp++;
	}
}

/* merge sort algorithm */
void cel_merge_sort(void * a, unsigned int length, unsigned int size, cel_compare_fn_t comp)
{
	/* temp array */
	unsigned char *tmp = (unsigned char *)calloc(length, sizeof(void *));
	unsigned char *arr = (unsigned char *)a;

	register unsigned int offset = 2;
	register unsigned int len, i, j;

	if(tmp == NULL) return;

	/* merge sort */
	while(offset <= length) 
	{
		len = length / offset;
		for(j = 0, i = 0; j < len - 1; j++) 
		{
			/* The right end was included, so plus 1 with it */
			merge(arr, tmp, size, comp, i, i + offset / 2, i + offset - 1);
			i += offset;
		}

		/* merge the len offset. (make sure you saw the i < len - 1) */
		merge(arr, tmp, size, comp, i, i + offset / 2, i + offset - 1);
		i += offset;

		/* Check if any items left for the array */
		len = i + offset / 2;
		if (len <= length - 1) 
		{
			merge(arr, tmp, size, comp, i, i + offset / 2, length - 1);
		}

		/* Double size the offset */
		offset += offset;
	}

	/* Check and merge the rest of the left items */
	offset /= 2;
	if (offset < length) 
	{
		merge(arr, tmp, size, comp, 0, offset, length - 1);
	}

	/* free the temp array */
	free(tmp);
}

/* internal function to get the median privot */
static unsigned char * median3(unsigned char * arr, unsigned int size, cel_compare_fn_t comp, unsigned int left, unsigned int right)
{
	unsigned char *lp = arr + left * size;
	unsigned char *cp = arr + ( (left + right) / 2 ) * size;
	unsigned char *rp = arr + right * size;

	if (comp(lp, cp) > 0)
		cel_mem_swap(lp, cp, size);

	if (comp(lp, rp) > 0)
		cel_mem_swap(lp, rp, size);

	if (comp( cp, rp) > 0)
		cel_mem_swap(cp, rp, size);

	/* move the pivot to the right - 1 positon */
	cel_mem_swap(cp, rp - size, size);

	return rp - size;
}

/* internal static function to do the partition */
static void quicksort(unsigned char * arr, unsigned int size, cel_compare_fn_t comp, unsigned int left, unsigned int right) 
{
	if(left + 11 <= right) 
	{
		/* get the privot of the subarray */
		unsigned char * pivot = median3( arr, size, comp, left, right );

		/* start partitioning */
		register unsigned int i = left, j = right - 1;
		for(; ;) 
		{
			while(comp(arr + (++i * size), pivot) < 0) ;
			while(comp(arr + (--j * size), pivot) > 0) ;

			if(i < j) 
				cel_mem_swap(arr + i * size, arr + j * size, size);
			else
				break;
		}

		/* swap the privot back to the small colleciton */
		cel_mem_swap(arr + i * size, pivot, size);

		quicksort(arr, size, comp, left, i - 1);
		quicksort(arr, size, comp, i, right);
	} 
	else 
	{
		/* if the number of the items is less than CUTOFF use insertion sort instead */
		cel_subinsert_sort(arr, size, comp, left, right);
	}
}

/**
 * quick sort algorithm.
 */
void cel_quick_sort(void * a, unsigned int length, unsigned int size, cel_compare_fn_t comp)
{
	unsigned char * arr = (unsigned char *)a;
	quicksort(arr, size, comp, 0, length - 1);
}



/**
 * bucket sort algorithm. yeah, this is fast, but also got lots of limits.
 * it applicable only for simple, unsigned integer (it could actually).
 * and the occurrence of the same numeric should be less than 2^(b - 1) .
 *
 * @a		- the array to be sort.
 * @length	- the length of the array.
 * @bits	- bits for one bucket,
 * 	it depened the occurrence of the same numeric.
 */
void cel_bucket_sort(unsigned int * a, unsigned int length, unsigned int bits)
{
	register unsigned int i;
	unsigned int __bytes;
	unsigned char * key;
	unsigned int idx;
	int v;

	/* find the max item in the array */
	unsigned int max = a[0];
	for (i = 1; i < length; i++)
	{
		if(a[i] > max) 
			max = a[i];
	}	

	/* make the allocations accorading to the max */
	__bytes = ( max * bits + CHAR_BIT - 1 ) / CHAR_BIT;
	key = (unsigned char *)calloc(__bytes, sizeof(char));

	if(key == NULL) 
		return;
	memset(key, 0x00, __bytes);

	/* Set the bits. (sorting) */
	for(i = 0; i < length; i++) 
	{
		v = 0;
		cel_mem_store(key, a[i] * bits, bits, &v, 0UL);
		v++;
		cel_mem_store(&v, 0UL, bits, key, a[i] * bits);
	}

	idx = 0;
	for(i = 0; i <= max; i++) 
	{
		v = 0;
		cel_mem_store(key, i * bits, bits, &v, 0UL);
		if(v > 0)
		{
			while(--v >= 0) 
				a[idx++] = i;
		}
	}

	/* free the allocations */
	free( key );
}
